rm(list = ls())

library(readr)
library(ggplot2)
library(stargazer)
library(lme4)
library(dplyr)
library(haven)
library(texreg)
library(tidyr)

# variables:
#"onset_ko_flag":Binary flag indicating group-level conflict onset / ko option
# "onset_ko_terr_flag": binary flag indicating group-level territorial conflict onset  / ko option
# "onset_ko_gov_flag: binary flag indicating group-level governmental conflict onset  / ko option
# onset_do_flag": binary flag indicating group-level conflict onset: / do option
# "onset_do_terr_flag": binary flag indicating group-level territorial conflict onset  / do option  
# "onset_do_gov_flag": binary flag indicating group-level governmental conflict onset  / do option  


load("Final_JPR/data/paperdata.RData")
data <- paperdata %>%
       filter(tek_count>0)

source("Final_JPR/Code/coefplot.R")
source("Final_JPR/Code/marginal_effects.R")
source("Final_JPR/Code/setup.R")


####################################### Table 1 and Figure 2 ######################################################################
## dependent variable
dv <- 'onset_do_flag' 
## controls
ctrs <- c('lineq_total', 'status_excl','lsize','family_warhistdummy','family_downgraded2', 
          'ctygdppc_log', 'ctypop_log','peaceyears','peaceyears_sq','peaceyears_cub')

ivs_1 <- c('best_tlineq_total', 'tekbest_status_excl')
ivs_2 <- c('worst_tlineq_total', 'tekworst_status_excl')
ivs_3 <- c("median_tlineq_total",'tekmedian_status_excl')
ivs_4 <- c('nearst_tlineq_total','teknearst_status_excl')

f_do_1 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_1, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_2 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_2, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_3 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_3, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_4 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_4, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))

#
ivs <- c('best_tlineq_total','worst_tlineq_total','median_tlineq_total','nearst_tlineq_total')


## using log of transnational inequality
model_do_log <- list()
for (i in 1:length(ivs)){
       model_do_log [[i]] <- glm(eval(parse(text = paste0('f_do_', i))), 
                                 data = data, family = binomial(link = "logit"))
}




p1 <- marginaleffect_logit(ModelResults = model_do_log, n.sim = 1000, 
                                 varname = c("best_tlineq_total", "worst_tlineq_total",
                                             "median_tlineq_total","nearst_tlineq_total"),
                                 data = data,
                                 val1 = 0, val2 =1, clusterid = "gwgroupid") 

p1 +  scale_x_continuous(breaks = 1:4,
                         labels = parse(text =rev(c( 
                                 "Nearest~TEK",
                                 "Median~TEK",
                                 "Worst~TEK",
                                 "Best~TEK")))) + ggtitle("")

# Figure 2-a
ggsave("Final_JPR/Figures/fig2-a.pdf", width = 4.8, height = 3.5, units='in', dpi=600)
ggsave("Final_JPR/Figures/fig2-a.jpg", width = 4.8, height = 3.5, units='in', dpi=600)


####################################### Model 1 across Tables A1-5 ######################################################################
## dependent variable
dv <- 'incidence_flag' 
## controls
ctrs <- c('lineq_total', 'status_excl','lsize','family_warhistdummy','family_downgraded2', 
          'ctygdppc_log', 'ctypop_log','peaceyears','peaceyears_sq','peaceyears_cub')

ivs_1 <- c('best_tlineq_total', 'tekbest_status_excl')
ivs_2 <- c('worst_tlineq_total', 'tekworst_status_excl')
ivs_3 <- c("median_tlineq_total",'tekmedian_status_excl')
ivs_4 <- c('nearst_tlineq_total','teknearst_status_excl')

f_do_1 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_1, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_2 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_2, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_3 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_3, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_4 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_4, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))

ivs <- c('best_tlineq_total','worst_tlineq_total','median_tlineq_total','nearst_tlineq_total')


## using log of transnational inequality
model_incidence_log <- list()
for (i in 1:length(ivs)){
        model_incidence_log  [[i]] <- glm(eval(parse(text = paste0('f_do_', i))), 
                                  data = data, family = binomial(link = "logit"))
}

p2 <- marginaleffect_logit(ModelResults = model_incidence_log, n.sim = 1000, 
                           varname = c("best_tlineq_total", "worst_tlineq_total",
                                       "median_tlineq_total","nearst_tlineq_total"),
                           data = data,
                           val1 = 0, val2 =1, clusterid = "gwgroupid") 

p2 + scale_x_continuous(breaks = 1:4,
                        labels = parse(text =rev(c( 
                                "Nearest~TEK",
                                "Median~TEK",
                                "Worst~TEK",
                                "Best~TEK"))))+ ggtitle("")

ggsave("Final_JPR/Figures/fig2-b.pdf", width = 4.8, height = 3.5, units='in', dpi=600)
ggsave("Final_JPR/Figures/fig2-b.jpg", width = 4.8, height = 3.5, units='in', dpi=600)


## Results to  Table 1
modSD = cluster_se(ModelResults = c(model_do_log,model_incidence_log), data = data, 
                   clusterid = "gwgroupid") 
texreg(c(model_do_log,model_incidence_log), file = "Final_JPR/tables/table-1.tex",
       stars = c(0.001, 0.01, 0.05),
       override.se =  modSD,
       caption = "Logistic regression results for ethnic conflict (1992-2020)",
       caption.above = TRUE,
       label = "tab:tab1",
       scalebox = 0.7,
       use.packages = FALSE,
       custom.header = list("DV: Conflict Onset" = 1:4,
                            "DV: Conflict Prevalence" = 5:8),
       custom.model.names = c("M1:Best", "M2:Worst", "M3:Median", "M4:Nearest",
                              "M5:Best", "M6:Worst", "M7:Median", "M8:Nearest"),
       custom.coef.map = list("best_tlineq_total" = "Transnational inequality",
                              "tekbest_status_excl" =  "TEK status excluded",
                             "lineq_total" = "Horizontal inequality",
                             "status_excl" =  "Status excluded",  
                             "lsize " =  "Relative group size",
                             "family_warhistdummy" =  "Previous rebellions",
                             "family_downgraded2" = "Status downgraded",
                             "ctygdppc_log" = "Ln(Country GDP per capita)",
                             "ctypop_log" = "Ln(Country population)",
                             "peaceyears" = "Peace year",
                             "peaceyears_sq" =  "Peace year$^2$",
                             "peaceyears_cub" = "Peace year$^2$",
                             "worst_tlineq_total" = "Transnational inequality",
                              "tekworst_status_excl" =  "TEK status excluded",
                             "median_tlineq_total" = "Transnational inequality",
                             "tekmedian_status_excl"  =  "TEK status excluded",
                             "nearst_tlineq_total" = "Transnational inequality",
                              "teknearst_status_excl" ="TEK status excluded",
                             "(Intercept)" = "Intercept"),
       digits = 3)




####################################### Figure 3 and Table A1 （relative poverty and relative wealth) ######################################################################

## robustness checks: relative terms

## dependent variable
dv <- 'onset_do_flag' 
## controls
ctrs <- c('lineq_total', 'status_excl','lsize','family_warhistdummy','family_downgraded2', 
          'ctygdppc_log', 'ctypop_log','peaceyears','peaceyears_sq','peaceyears_cub')

ivs_1 <- c('best_low_tlineq', 'best_high_tlineq', 'tekbest_status_excl')
ivs_2 <- c('worst_low_tlineq', 'worst_high_tlineq', 'tekworst_status_excl')
ivs_3 <- c('median_low_tlineq', 'median_high_tlineq','tekmedian_status_excl')
ivs_4 <- c('nearst_low_tlineq', 'nearst_high_tlineq','teknearst_status_excl')


f_do_1 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_1, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_2 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_2, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_3 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_3, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_4 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_4, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))

#
ivs <- c('best','worst','median','nearst')


## using log of transnational inequality
model_onset_relative <- list()
for (i in 1:length(ivs)){
        model_onset_relative[[i]] <- glm(eval(parse(text = paste0('f_do_', i))), 
                                         data = data, family = binomial(link = "logit"))
}


## dependent variable
dv <- 'incidence_flag' 
## controls
ctrs <- c('lineq_total', 'status_excl','lsize','family_warhistdummy','family_downgraded2', 
          'ctygdppc_log', 'ctypop_log','peaceyears','peaceyears_sq','peaceyears_cub')

ivs_1 <- c('best_low_tlineq', 'best_high_tlineq', 'tekbest_status_excl')
ivs_2 <- c('worst_low_tlineq', 'worst_high_tlineq', 'tekworst_status_excl')
ivs_3 <- c('median_low_tlineq', 'median_high_tlineq','tekmedian_status_excl')
ivs_4 <- c('nearst_low_tlineq', 'nearst_high_tlineq','teknearst_status_excl')


f_do_1 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_1, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_2 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_2, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_3 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_3, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))
f_do_4 <- formula(paste0(dv, ' ~ ', paste(paste(ivs_4, collapse=' + '), paste(ctrs, collapse=' + '), sep =' + ')))

#
ivs <- c('best','worst','median','nearst')


## using log of transnational inequality
model_incidence_relative <- list()
for (i in 1:length(ivs)){
        model_incidence_relative[[i]] <- glm(eval(parse(text = paste0('f_do_', i))), 
                                             data = data, family = binomial(link = "logit"))
}




p1_1 <- marginaleffect_logit2(ModelResults =  model_incidence_relative, n.sim = 1000, 
                           varname = c("best_low_tlineq", "worst_low_tlineq",
                                       "median_low_tlineq","nearst_low_tlineq"),
                           data = data,
                           dv = "relative_poverty",
                           val1 = 0, val2 =1, clusterid = "gwgroupid") 


p1_2 <- marginaleffect_logit2(ModelResults =  model_incidence_relative, n.sim = 1000, 
                              varname = c("best_high_tlineq", "worst_high_tlineq",
                                          "median_high_tlineq","nearst_high_tlineq"),
                              data = data,
                              dv = "relative_wealth",
                              val1 = 0, val2 =1, clusterid = "gwgroupid") 


df_relative_incidence <- bind_rows(p1_1, p1_2)



p = ggplot(df_relative_incidence , aes(colour = dv)) + 
        geom_hline(yintercept = 0, lty = 2) +
        geom_linerange(aes(x = id, ymin = low,
                           ymax = high),
                       lwd = 1, position = position_dodge(width = 1/2)) +
        geom_pointrange(aes(x = id, y =median, ymin =low,
                            ymax =high, shape = dv),
                        fatten = 7,
                        lwd = 1, position = position_dodge(width = 1/2),
                        fill = "WHITE")+
        geom_text(aes(x = id +0.2 ,
                      y = median,
                      label = format(round(median, 2))),position = position_dodge(width = 1/2)) +
        theme_bw() + xlab("") + ylab("First Difference in Predicted Prob.") + 
        theme(legend.position="bottom",
              legend.title=element_blank(),
              legend.text = element_text(colour="blue", size=12), #blue
              legend.key.height = unit(1, 'cm'),  
              axis.text= element_text(size=12),
              text = element_text(size=12),
              plot.title = element_text(hjust = .5, size = 12))+
        ggtitle("")+
       #scale_colour_manual(values = c("black", "black"), labels = rev(c("Relative Wealth","Relative poverty")))+
       scale_colour_discrete(labels = rev(c("Relative Wealth","Relative poverty")))+
       scale_shape_manual(values = c(16,17),labels = rev(c("Relative Wealth","Relative poverty")) )+
       guides(color = guide_legend(nrow = 1),
              shape = guide_legend(nrow = 1, byrow = TRUE,
                                   override.aes = list(size = 1))
       )+
        scale_x_continuous(breaks = 1:4,labels =parse(text =rev(c( 
                "Nearest~TEK",
                "Median~TEK",
                "Worst~TEK",
                "Best~TEK")))) 
ggsave("Final_JPR/Figures/fig3-b.pdf", width = 5.5, height = 3.5, units='in', dpi=600)
ggsave("Final_JPR/Figures/fig3-b.jpg", width = 5.5, height = 3.5, units='in', dpi=600)
#ggsave("Final_JPR/Figures/black_figures/fig3-b.jpg", width = 5.5, height = 3.5, units='in', dpi=600)

###

p2_1 <- marginaleffect_logit2(ModelResults =  model_onset_relative, n.sim = 1000, 
                              varname = c("best_low_tlineq", "worst_low_tlineq",
                                          "median_low_tlineq","nearst_low_tlineq"),
                              data = data,
                              dv = "relative_poverty",
                              val1 = 0, val2 =1, clusterid = "gwgroupid") 


p2_2 <- marginaleffect_logit2(ModelResults =  model_onset_relative, n.sim = 1000, 
                              varname = c("best_high_tlineq", "worst_high_tlineq",
                                          "median_high_tlineq","nearst_high_tlineq"),
                              data = data,
                              dv = "relative_wealth",
                              val1 = 0, val2 =1, clusterid = "gwgroupid") 


df_relative_onset <- bind_rows(p2_1, p2_2)



p = ggplot(df_relative_onset , aes(colour = dv)) + 
        geom_hline(yintercept = 0, lty = 2) +
        geom_linerange(aes(x = id, ymin = low,
                           ymax = high),
                       lwd = 1, position = position_dodge(width = 1/2)) +
        geom_pointrange(aes(x = id, y =median, ymin =low,
                            ymax =high, shape = dv),
                        fatten = 7,
                        lwd = 1, position = position_dodge(width = 1/2),
                        fill = "WHITE")+
        geom_text(aes(x = id +0.2 ,
                      y = median,
                      label = format(round(median, 2))),position = position_dodge(width = 1/2)) +
        theme_bw() + xlab("") + ylab("First Difference in Predicted Prob.") + 
        theme(legend.position="bottom",
              legend.title=element_blank(),
              legend.text = element_text(colour="blue", size=12),#black
              legend.key.height = unit(1, 'cm'),  
              axis.text= element_text(size=12),
              text = element_text(size=12),
              plot.title = element_text(hjust = .5, size = 12))+
        ggtitle("")+
      #scale_colour_manual(values = c("black", "black"), labels = rev(c("Relative Wealth","Relative poverty")))+
       scale_colour_discrete(labels = rev(c("Relative Wealth","Relative poverty")))+
        scale_shape_manual(values = c(16,17),labels = rev(c("Relative Wealth","Relative poverty")) )+
       guides(color = guide_legend(nrow = 1),
              shape = guide_legend(nrow = 1, byrow = TRUE,
                                   override.aes = list(size = 1))
       )+
        scale_x_continuous(breaks = 1:4,labels =parse(text =rev(c( 
                "Nearest~TEK",
                "Median~TEK",
                "Worst~TEK",
                "Best~TEK")))) 
ggsave("Final_JPR/Figures/fig3-a.pdf", width = 5.5, height = 3.5, units='in', dpi=600)
ggsave("Final_JPR/Figures/fig3-a.jpg", width = 5.5, height = 3.5, units='in', dpi=600)
#ggsave("Final_JPR/Figures/black_figures/fig3-a.jpg", width = 5.5, height = 3.5, units='in', dpi=600)


######## Appendix Table A1

modSD = cluster_se(ModelResults = c(model_onset_relative, model_incidence_relative), data = data, 
                   clusterid = "gwgroupid") 

texreg(c(model_onset_relative, model_incidence_relative ), file = "Final_JPR/tables/Table_A1.tex",
       stars = c(0.001, 0.01, 0.05),
       override.se =  modSD,
       caption = "Logistic regression results of upward and downward comparisons for ethnic conflict (1992-2020)",
       caption.above = TRUE,
       label = "tab:tab5",
       scalebox = 0.7,
       use.packages = FALSE,
       custom.header = list("DV: Conflict Onset" = 1:4,
                            "DV: Conflict Prevalence" = 5:8),
       custom.model.names = c("M1:Best", "M2:Worst", "M3:Median", "M4:Nearest",
                              "M5:Best", "M6:Worst", "M7:Median", "M8:Nearest"),
       custom.coef.map = list("best_low_tlineq" = "Relative poverty",
                              "best_high_tlineq" = "Relative wealth",
                              "tekbest_status_excl" =  "TEK status excluded",
                              "lineq_total" = "Horizontal inequality",
                              "status_excl" =  "Status excluded",  
                              "lsize " =  "Relative group size",
                              "family_warhistdummy" =  "Previous rebellions",
                              "family_downgraded2" = "Status downgraded",
                              "ctygdppc_log" = "Ln(Country GDP per capita)",
                              "ctypop_log" = "Ln(Country population)",
                              "worst_low_tlineq" ="Relative poverty",
                              "worst_high_tlineq" = "Relative wealth",
                              "tekworst_status_excl" =  "TEK status excluded",
                              "median_low_tlineq" = "Relative poverty",
                              "median_high_tlineq" =  "Relative wealth",
                              "median_tlineq_total" = "Transnational inequality",
                              "tekmedian_status_excl"  =  "TEK status excluded",
                              "nearst_low_tlineq" ="Relative poverty",
                              "nearst_high_tlineq" =  "Relative wealth",
                              "teknearst_status_excl" ="TEK status excluded",
                              "peaceyears" = "Peace year",
                              "peaceyears_sq" =  "Peace year $^2$",
                              "peaceyears_cub" = "Peace year $^2$",
                              "(Intercept)" = "Intercept"),
       digits = 3)


